# 1练习 获取网易新闻头条部分信息
import requests
from lxml import etree

url = 'https://news.163.com'
resp = requests.get(url)
if resp.status_code != 200:
    raise Exception("请求失败")
html_content = resp.text

# 2020-11-15 观察html_content有些class值会在加载后变化（通过nodejs框架书写）
pattern1 = '//div[@class="news_default_news"]/ul[@class="top_news_ul"]/li/a/text()'
pattern2 = '//div[@class="mt35 mod_hot_rank clearfix"]/ul/li/a/text()'

tree = etree.HTML(html_content)
news1 = tree.xpath(pattern1)
news2 = tree.xpath(pattern2)

for i in news1:
    print(i)
for j in news2:
    print(j)

# xpath尝试思路
# xpath没有匹配到想要的东西 '//div[@class="mod_top_news2" and @id="js_top_news"]/h2/a/text()'  ，不断缩短xpath表达式，看前面的标签有没有匹配到。